By: Eugenia Poon, Kayla Zhu
Motivation: The progressing issue of California wildfires in recent years has destroyed ecosystems, magnified air pollution and health risks, and weakened labor and resources.
Goal: predict the risk/presence/spread of fire
Load and Clean Data
Feature Engineering
Modeling
%matplotlib inline
import os
import os.path
import random
import warnings
import zipfile
import geopandas as gpd
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import xarray as xr
from IPython.display import HTML, Javascript, display
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
from imblearn.under_sampling import RandomUnderSampler
from matplotlib import pyplot as plt
from plotly import express as px, graph_objects as go
from sklearn.metrics import (
accuracy_score,
f1_score,
mean_absolute_error,
mean_squared_error,
)
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dense, Input, Normalization
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from xgboost import XGBClassifier
from wildfireFunc import (
getDF,
getFireRange,
getInter,
getNC,
plotInter,
plotRes,
model,
plot_loss,
score,
split,
)
warnings.filterwarnings('ignore')
plt.rcParams['figure.figsize'] = (6, 4)
plt.rcParams['figure.dpi'] = 400
plt.rcParams['font.size'] = 5
plt.rcParams['figure.titlesize'] = 10
plt.rcParams['axes.linewidth'] = 0.1
plt.rcParams['patch.linewidth'] = 0
display(HTML("<style>.container { width:100% !important; }</style>"))
random.seed(100)
tf.random.set_seed(100)
| Sierra Nevada | Fire Perimeter | |
|---|---|---|
| SOURCE | Sierra Nevada Conservancy Boundary | CAL Fire |
| OPEN | View Data Source | View Data Source |
| OPEN | California Fire Perimeters (1950+) | |
| OPEN | Query | Query |
| Where: | 1=1 | YEAR_=2021 |
| Geometry Type: | Polygon | Polygon |
| Spatial Relationship: | Intersects | Intersects |
| Units: | Meters | Meters |
| Out fields: | * | * |
| Return Geometry: | True | True |
| Format: | GeoJSON | GeoJSON |
| Units: | Meters | Meters |
| CLICK | Query (GET) | Query (GET) |
# sierra nevada
url = 'https://services1.arcgis.com/sTaVXkn06Nqew9yU/arcgis/rest/services/Sierra_Nevada_Conservancy_Boundary/FeatureServer/0/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryPolygon&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=&units=esriSRUnit_Meter&relationParam=&returnGeodetic=false&outFields=*&returnGeometry=true&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&defaultSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pgeojson&token='
snBoundaries = gpd.read_file(url).to_crs(epsg=4326)
snBounds = snBoundaries.geometry.bounds
bounds = [-122, 39.35, snBounds.iloc[0].maxx, 41]
boundaries = gpd.clip(snBoundaries, bounds)
bounds = boundaries.geometry.bounds
display(snBounds)
# fire perimeter
url1 = 'https://services1.arcgis.com/jUJYIo9tSA7EHvfZ/ArcGIS/rest/services/California_Fire_Perimeters/FeatureServer/2/query?where=YEAR_%3D'
url2 = '&objectIds=&time=&geometry=&geometryType=esriGeometryPolygon&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=&units=esriSRUnit_Meter&relationParam=&returnGeodetic=false&outFields=*&returnGeometry=true&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&defaultSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pgeojson&token='
years = ['2020', '2021']
firePer = pd.concat([gpd.read_file(url1 + y + url2) for y in years])
dateCol = ['ALARM_DATE', 'CONT_DATE']
firePer = firePer[firePer.STATE == 'CA'].dropna(subset=dateCol)
firePer.geometry = firePer.geometry.make_valid() # 2021 has invalid geometry
firePer = gpd.clip(firePer, boundaries) # fires in area
firePer[dateCol] = firePer[dateCol].apply(pd.to_datetime, unit='ms')
firePer = firePer.sort_values('ALARM_DATE').reset_index(drop=True)
print(f'{len(firePer)} fires')
display(firePer.sort_values('Shape__Area')[
['FIRE_NAME', 'Shape__Area']].tail())
| minx | miny | maxx | maxy | |
|---|---|---|---|---|
| 0 | -123.039382 | 35.064862 | -117.735384 | 41.994919 |
64 fires
| FIRE_NAME | Shape__Area | |
|---|---|---|
| 26 | LOYALTON | 3.198601e+08 |
| 41 | W-5 COLD SPRINGS | 6.049077e+08 |
| 58 | SUGAR | 7.236506e+08 |
| 34 | NORTH COMPLEX | 2.183903e+09 |
| 59 | DIXIE | 6.692866e+09 |
fig, ax = plt.subplots(nrows=1, ncols=1, dpi=600)
snBoundaries.plot(ax=ax, color='grey', alpha=0.1, linewidth=0)
boundaries.plot(ax=ax, color='grey', alpha=0.3, linewidth=0)
firePer.plot('YEAR_', legend=True, alpha=0.6, linewidth=0, ax=ax)
plt.tick_params(axis='both', labelsize=5)
ax.set(xlabel='longitude', ylabel='latitude')
plt.show()
| variable | var | units | description |
|---|---|---|---|
| relative_humidity | rmin | % | minimum relative humidity |
| air_temperature | tmmx | K | maximum temperature |
| wind_speed | vs | m/s | wind velocity at 10m |
| burning_index_g | bi | NFDRS fire danger index | burning index |
path = 'data/gm.nc'
if os.path.isfile(path+'.zip') == False: # ~3 minutes
getNC(path, bounds, boundaries, years)
!rm data/gm.nc
ds = xr.open_dataset(zipfile.ZipFile(path+'.zip').open('gm.nc'))
print(ds.dims)
Frozen({'lon': 52, 'lat': 44, 'day': 731})
factor = 1
dd = getInter(ds, factor)
print(np.unique(abs(np.diff(dd.lon.to_numpy()))),
np.unique(abs(np.diff(dd.lat.to_numpy()))))
print()
res = abs(np.diff(dd.lon.to_numpy()))[0]
display(dd)
[0.04166667 0.04166667] [0.04166667 0.04166667]
<xarray.Dataset>
Dimensions: (lon: 52, lat: 44, day: 731)
Coordinates:
* lon (lon) float64 -122.1 -122.0 -122.0 ... -120.0 -119.9
* lat (lat) float64 41.07 41.03 40.98 ... 39.36 39.32 39.28
* day (day) datetime64[ns] 2020-01-01 2020-01-02 ... 2021-12-31
Data variables:
burning_index_g (day, lat, lon) float32 ...
relative_humidity (day, lat, lon) float32 ...
air_temperature (day, lat, lon) float32 ...
wind_speed (day, lat, lon) float32 ...
Attributes:
units: Unitless
description: BI-G
long_name: bi
standard_name: bi
dimensions: lon lat time
grid_mapping: crs
coordinate_system: WGS84,EPSG:4326plotInter(firePer, ds)
path = 'data/gm.pkl'
if os.path.isfile(path+'.zip') == False:
getDF(path, res, dd, firePer)
!rm data/gm.pkl
df = pd.read_pickle(zipfile.ZipFile(path+'.zip').open('gm.pkl'))
df['coor'] = df.groupby(['lat', 'lon']).ngroup()
display(df.info(verbose=False))
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1319455 entries, 0 to 1319454 Columns: 10 entries, lon to coor dtypes: datetime64[ns](1), float32(4), float64(2), int64(3) memory usage: 80.5 MB
None
# get data for 3 days before
fire = df.copy()
cols = ['burning_index_g', 'relative_humidity',
'air_temperature', 'wind_speed']
shifted = []
for i in np.arange(1, 4):
temp = fire.groupby('coor')[cols].shift(i)
temp = temp.add_suffix(i)
shifted.append(temp)
shifted = pd.concat(shifted, axis=1).fillna(method='bfill')
fire = pd.concat([fire, shifted], axis=1) # keep current
# fire = pd.concat([fire.drop(columns=cols), shifted], axis=1) # remove current
fire = fire.reindex(sorted(fire.columns), axis=1)
For every coordinate of day x:
ll = df[['lat', 'lon']].drop_duplicates()
def getSurrounding(lat, lon, ll=ll, fire=fire):
n, s = lat + res, lat - res
e, w = lon + res, lon - res
ll = ll[(ll.lat <= n) & (ll.lat >= s) & (ll.lon <= e) & (ll.lon >= w)]
temp = fire[fire.lat.isin(ll.lat) & fire.lon.isin(ll.lon)]
# remove center
plus = temp.lat + temp.lon**2
temp = temp[plus != lat+lon**2]
temp['lat'] = lat
temp['lon'] = lon
return temp
# get surrounding coordinates
f = fire.loc[:, ~fire.columns.str.endswith('3')]
ex = pd.concat([getSurrounding(lat=g[0][0], lon=g[0][1])
for g in f.groupby(['lat', 'lon'])])
# get extreme data for 2 days before (surrounding)
extreme = (ex.groupby(['lat', 'lon', 'day'])
.agg(at1=('air_temperature1', max),
at2=('air_temperature2', max),
bi1=('burning_index_g1', max),
bi2=('burning_index_g2', max),
rh1=('relative_humidity1', min),
rh2=('relative_humidity2', min),
ws1=('wind_speed1', max),
ws2=('wind_speed2', max),))
extreme = (f.merge(extreme.reset_index(), on=['lat', 'lon', 'day'])
.drop(columns=['fire', 'lon', 'lat', 'relative_humidity',
'air_temperature', 'wind_speed']))
Burning index pg25 or 35 (Table)
| BI-1978 | Narrative Comments |
|---|---|
| 0-30 | Most prescribed burns are conducted in this range. |
| 30-40 | Generally represent the limit of control for direct attack methods. |
| 40-60 | Machine methods usually necessary or indirect attack should be used |
| 60-80 | The prospects for direct control by any means are poor above this intensity. |
| 80-90 | The heat load on people within 30 feet of the fire is dangerous. |
| 90-110+ | Above this intensity, spotting, fire whirls, and crowning should be expected. |
bins = [-1, 30, 40, 60, 80, 90, max(extreme['burning_index_g'])+1]
labels = np.arange(len(bins)-1)
danger = to_categorical(pd.cut(extreme['burning_index_g'],
bins=bins, labels=labels))
extreme = pd.concat([extreme, pd.DataFrame(danger)], axis=1)
S = 100
fireS = fire.sample(n=100000, random_state=S)
extremeS = extreme.sample(n=100000, random_state=S)
aa = df.groupby('day').mean(numeric_only=True).drop(columns='fire')
bb = pd.DataFrame(StandardScaler().fit_transform(aa),
columns=aa.columns, index=aa.index)
bb = bb[cols].rolling(15).mean().reset_index()
px.line(bb, x=bb.day, y=cols).update_layout(margin=dict(t=0, b=0, r=0, l=0))
col = ['burning_index_g', 'relative_humidity', 'air_temperature', 'wind_speed']
n = ['fire_length', 'start_month']
fig1 = px.histogram(fireS, x=col, histnorm='percent')
fig2 = px.histogram(x=[len(x) for x in getFireRange(firePer)],
histnorm='percent')
fig2.update_traces(dict(name=n[0], offsetgroup=n[0], legendgroup=n[0],
marker_color=px.colors.qualitative.Plotly[5],
showlegend=True))
fig3 = px.histogram(x=firePer.ALARM_DATE.dt.month, histnorm='percent')
fig3.update_traces(dict(name=n[1], offsetgroup=n[1], legendgroup=n[1],
marker_color=px.colors.qualitative.Plotly[6],
showlegend=True))
fig = go.Figure(data=fig1.data+fig2.data+fig3.data)
fig.update_layout(margin=dict(t=15, b=0, r=0, l=0),
barmode='overlay', bargap=0.1)
fig.update_traces(visible='legendonly', opacity=0.7)
fig.data[0].visible = True
fig.show()
| Idea | predict if each coordinate had a fire on a day |
| Data | independent: indicator for fire using dates and coordinates given by fire perimeter |
| dependent: weather of the previous 3 days and others | |
| Metric | F1 considers precision and recall and not true negatives (no fire) |
| Maximize true positives or correctly predicting yes fires because incorrectly predicting yes fire as no fire is dangerous | |
| Issue | perimeters include all coordinates that had fire throughout one or more days |
| for fires that span multiple days, some coordinates might not have any fire until some days after the start of the fire | |
| model will still indicate fire even if weather conditions are normal |
target = ['fire']
temp = fireS.drop(columns=['burning_index_g', 'relative_humidity',
'air_temperature', 'wind_speed'])
X1, X_test1, y1, y_test1, X_train1, X_val1, y_train1, y_val1 = split(
temp, target)
print(f'{np.mean(y1) * 100:.2f}% yes fire\n')
mm1 = model(X_train1, y_train1, X_val1, y_val1, method='c')
4.17% yes fire Logistic Regression - Decision Tree - LDA - LightGBM - XGBoost -
| F1 | TPR | FPR | Precision | logloss | |
|---|---|---|---|---|---|
| XGBoost | 0.677863 | 0.595174 | 0.006955 | 0.787234 | 0.845023 |
| LightGBM | 0.616260 | 0.508043 | 0.006086 | 0.783058 | 0.945145 |
| Decision Tree | 0.523975 | 0.505362 | 0.018315 | 0.544012 | 1.371661 |
| Logistic Regression | 0.169446 | 0.762735 | 0.313029 | 0.095310 | 11.169528 |
| LDA | 0.015584 | 0.008043 | 0.001043 | 0.250000 | 1.517838 |
Model is not expected to perform better because of the previously mentioned issues
Class Imbalance
For the training data
pipeline = Pipeline(steps=[('over', SMOTE(sampling_strategy=0.1)),
('under', RandomUnderSampler(sampling_strategy=0.5))
])
X_train1_, y_train1_ = pipeline.fit_resample(X_train1, y_train1)
xgb1 = XGBClassifier(eval_metric='aucpr', random_state=100)
xgb1.fit(X_train1_, y_train1_)
_, _, _, _, _ = score(y_val1, xgb1.predict(X_val1), method='c', p=True)
f1: 0.5904, TPR: 0.9062, FPR: 0.0503, Precision: 0.4378
| Idea | predict the risk of fire based on prior nearby conditions |
| Data | independent: categorized burn index |
| dependent: weather from 2 days before and worst weather conditions of nearby values | |
| Metric | F1 |
| Issue | categorization may not be descriptive enough but may be useful for identifying next-day dangers assuming that fires are environmentally caused or continuous |
target = list(labels)
temp = extremeS.drop(columns='burning_index_g')
X, X_test, yC, y_testC, X_train, X_val, y_trainC, y_valC = split(temp, target)
y_trainC = np.argmax(y_trainC, axis=1)
pipeline = Pipeline(steps=[('over', SMOTE()),
('under', RandomUnderSampler())])
X_train_, y_trainC_ = pipeline.fit_resample(X_train, y_trainC)
y_trainC = pd.DataFrame(to_categorical(y_trainC), index=X_train.index)
y_trainC_ = pd.DataFrame(to_categorical(y_trainC_), index=X_train_.index)
xgbC = XGBClassifier(eval_metric='aucpr', random_state=100)
xgbC.fit(X_train_, y_trainC_)
print(f"F1={f1_score(y_valC, xgbC.predict(X_val), average='weighted'):.3f}")
print(f'Acc={accuracy_score(y_valC, xgbC.predict(X_val)):.3f}')
sns.heatmap(pd.crosstab(np.argmax(y_valC, axis=1),
np.argmax(xgbC.predict(X_val), axis=1),
rownames=['Actual'], colnames=['Predicted'],
margins=True, normalize='index'
), cmap='Blues', annot=True)
plt.show()
F1=0.735 Acc=0.656
| Idea | predict the spread of fire based on prior nearby conditions |
| Data | independent: burn index |
| dependent: weather from 2 days before and worst weather conditions of nearby values | |
| Metric | RMSE |
| Issue | difficult to correctly predict zeros |
target = ['burning_index_g']
temp = extremeS.drop(columns=labels)
X, X_test, yR, y_testR, X_train, X_val, y_trainR, y_valR = split(temp, target)
mmR = model(X_train, y_trainR, X_val, y_valR, method='r')
plotRes(y_valR, mmR['XGBoost'][0].predict(X_val))
Linear Regression - Decision Tree - LightGBM - XGBoost -
| rmse | mae | |
|---|---|---|
| XGBoost | 13.303519 | 9.051157 |
| LightGBM | 13.791284 | 9.456384 |
| Linear Regression | 15.969732 | 10.757834 |
| Decision Tree | 17.770387 | 10.049889 |
| Idea | predict the spread and risk of fire based on prior nearby conditions |
| Data | independent: burn index, categorized burn index |
| dependent: weather from 2 days before and worst weather conditions of nearby values | |
| Metric | RMSE, F1 |
| Issue | same as model 2, 3 |
def modelNN(X_train, y_trainC):
# normalize data
normalizer = Normalization(axis=-1)
normalizer.adapt(np.array(X_train))
# input layer
visible = Input(shape=(X_train.shape[1],))
# hidden layers
hidden1 = Dense(512, activation='relu',
kernel_initializer='he_normal')(normalizer(visible))
hidden2 = Dense(512, activation='relu',
kernel_initializer='he_normal')(hidden1)
# output layer
outR = Dense(1, activation='linear')(hidden2) # reg
outC = Dense(y_trainC.shape[1], activation='softmax')(hidden2) # class
model = Model(inputs=visible, outputs=[outR, outC])
model.compile(loss=['mse', 'categorical_crossentropy'], optimizer='adam',
metrics=['RootMeanSquaredError', 'F1Score'])
return model
%%time
nn4 = modelNN(X_train, y_trainC)
history = nn4.fit(X_train, [y_trainR, y_trainC], validation_split=0.3,
epochs=50, batch_size=64, verbose=0)
y_predR, y_predC = nn4.predict(X_val, verbose=0)
y_predC = to_categorical(np.argmax(y_predC, axis=1))
plot_loss(history)
CPU times: user 1min 22s, sys: 23.1 s, total: 1min 45s Wall time: 49.7 s
print('Regression: ', end=' ')
print(f'MAE={mean_absolute_error(y_valR, y_predR):.3f}', end=' ')
print(f'RMSE={mean_squared_error(y_valR, y_predR, squared=False):.3f}')
print('Classification:', end=' ')
print(f"F1={f1_score(y_valC, y_predC, average='weighted'):.3f}", end=' ')
print(f'Acc={accuracy_score(y_valC, y_predC):.3f}')
plotRes(y_valR, y_predR)
sns.heatmap(pd.crosstab(np.argmax(y_valC, axis=1), np.argmax(y_predC, axis=1),
rownames=['Actual'], colnames=['Predicted'],
margins=True, normalize='index'
), cmap='Blues', annot=True)
plt.title('Confusion Matrix')
plt.show()
Regression: MAE=8.286 RMSE=12.186 Classification: F1=0.745 Acc=0.748
def perform_bootstrap(X_test, y_testR, y_testC, models: dict, samp=500):
np.random.seed(100)
for bs_iter in range(samp):
bs_index = np.random.choice(
X_test.index, len(X_test.index), replace=True)
bs_data = X_test.loc[bs_index]
bs_testR = y_testR.loc[bs_index]
bs_testC = y_testC.loc[bs_index]
for m, model in models.items():
if model[0][1] == 'c':
try:
bs_pred = model[0][0].predict(bs_data, verbose=0)
bs_pred = to_categorical(np.argmax(bs_pred[1], axis=1))
except:
bs_pred = model[0][0].predict(bs_data)
acc = accuracy_score(bs_testC, bs_pred)
f1 = f1_score(bs_testC, bs_pred, average='weighted')
model[1].append([acc, f1])
elif model[0][1] == 'r':
try:
bs_pred = model[0][0].predict(bs_data, verbose=0)
bs_pred = bs_pred[0]
except:
bs_pred = model[0][0].predict(bs_data)
model[1].append([*score(bs_testR, bs_pred, method='r')])
return models
%%time
m = {
'XGBClassifier': [[xgbC, 'c'], []], # 2
'XGBRegressor': [[mmR['XGBoost'][0], 'r'], []], # 3
'Neural Network Class': [[nn4, 'c'], []], # 4C
'Neural Network Reg': [[nn4, 'r'], []], # 4R
}
bs = perform_bootstrap(X_test, y_testR, y_testC, models=m, samp=500)
CPU times: user 24min, sys: 4min 51s, total: 28min 52s Wall time: 16min 34s
# Bootstrap: Classification
fig, ax = plt.subplots(nrows=1, ncols=2)
for i in [0, 2]:
sns.histplot(np.array(list(bs.values())[i][1])[:, 0], edgecolor='none',
color=sns.color_palette()[i], alpha=0.4, ax=ax[0])
sns.histplot(np.array(list(bs.values())[i][1])[:, 1], edgecolor='none',
color=sns.color_palette()[i], alpha=0.4, ax=ax[1])
fig.suptitle('Bootstrap: Classification')
ax[0].set_title('Accuracy')
ax[1].set_title('F1')
plt.legend(title='Model', labels=list(bs.keys())[0:3:2],
bbox_to_anchor=(1, 1.02), loc='upper left')
plt.show()
# Bootstrap: Regression
fig, ax = plt.subplots(nrows=1, ncols=2)
for i in [1, 3]:
sns.histplot(np.array(list(bs.values())[i][1])[:, 0], edgecolor='none',
color=sns.color_palette()[i], alpha=0.4, ax=ax[0])
sns.histplot(np.array(list(bs.values())[i][1])[:, 1], edgecolor='none',
color=sns.color_palette()[i], alpha=0.4, ax=ax[1])
fig.suptitle('Bootstrap: Regression')
ax[0].set_title('RMSE')
ax[1].set_title('MAE')
plt.legend(title='Model', labels=list(bs.keys())[1:4:2],
bbox_to_anchor=(1, 1.02), loc='upper left')
plt.show()
Neural network scores are better compared to XGBoost models
target = list(labels)
temp = extreme.drop(columns='burning_index_g')
X, X_test, yC, y_testC, X_train, X_val, y_trainC, y_valC = split(temp, target)
target = ['burning_index_g']
temp = extreme.drop(columns=labels)
X, X_test, yR, y_testR, X_train, X_val, y_trainR, y_valR = split(temp, target)
%%time
model = modelNN(X, yC)
history = model.fit(X, [yR, yC], validation_split=0.4,
epochs=50, batch_size=128, verbose=0)
y_predR, y_predC = model.predict(X_test, verbose=0)
y_predC = to_categorical(np.argmax(y_predC, axis=1))
plot_loss(history)
CPU times: user 17min 25s, sys: 3min 8s, total: 20min 33s Wall time: 7min 9s
print('Regression: ', end=' ')
print(f'MAE={mean_absolute_error(y_testR, y_predR):.3f}', end=' ')
print(f'RMSE={mean_squared_error(y_testR, y_predR, squared=False):.3f}')
print('Classification:', end=' ')
print(f"F1={f1_score(y_testC, y_predC, average='weighted'):.3f}", end=' ')
print(f'Acc={accuracy_score(y_testC, y_predC):.3f}')
plotRes(y_testR, y_predR)
sns.heatmap(pd.crosstab(np.argmax(y_testC, axis=1), np.argmax(y_predC, axis=1),
rownames=['Actual'], colnames=['Predicted'],
margins=True, normalize='index'
), cmap='Blues', annot=True)
plt.title('Confusion Matrix')
plt.show()
Regression: MAE=5.028 RMSE=7.887 Classification: F1=0.821 Acc=0.827
Although the neural network performed better, a persistent error involving zero burn index appeared in each tested model. This error could be due to the unpredictability of short-lived and/or unnatural fires using GridMET's daily aggregate data. Daily data does not allow for adequate prediction for these fires because unnatural fires may start abruptly regardless of weather conditions. For this reason, a finer temporal resolution by minutes or seconds may be more useful to detect sudden changes. Furthermore, the acquired data does not include any topological information that could be beneficial.
# save notebook
display(Javascript('IPython.notebook.save_checkpoint();'))
# save notebook as html to eugpoon.github.io/projects
!jupyter nbconvert wildfire.ipynb --to html
%mv "wildfire.html" "../eugpoon.github.io/projects/"
# restyle imports, clear output, replace file
!cleanipynb wildfire.ipynb
# restart kernel
display(HTML("<script>Jupyter.notebook.kernel.restart()</script>"))